clear 
clear matrix
clear mata
set more off
set varabbrev off
set mem 100000000

* path to main folder for Data
global mystart  "Replication JPE\Data"

* Folder where we store other intermediate data files for analysis
global data "${mystart}\CPS\CPS Data Files\Intermediate Data Files\"

global out_figures "${mystart}\Tables and Figures\"
global out_tables "${mystart}\Tables and Figures\"

* Folder where we store parameters for model
global data_model "${mystart}\Parameters for Model\"


**** Basic cleaning data ******************************
qui {
	use "${mystart}\CPS\CPS Data Files\Raw CPS Data files\Extract 2022.dta"
*** downloaded from http://cps.ipums.org/, converted to Stata format. 

keep if sex==1

*since we use information on wages and employment from last year
gen age_actual=age	
replace age=age-1
	
keep if age>=25 & age<65

***keep adult civilian
keep if popstat==1
drop popstat

keep if relate==101 // keep hh head

***drop if weight is negative or zero (perwt has been renamed to wtsupp)
rename asecwt perwt  
drop if perwt<=0
replace perwt=int(perwt)
 
*** drop if attending high school or college full time or part time
drop if schlcoll>=1 & schlcoll<=4

keep if year>=2001 & year<=2013

replace educ=73 if higrade==150
replace educ=110 if higrade>=151 & higrade<=181
replace educ=111 if higrade>=190 & higrade<=210

*** EDUCATION GROUPS ***
* use variable educ
gen education_group=.
replace education_group=1 if educ<=72 & educ!=1 & educ!=.   /* less than high-school */
replace education_group=2 if educ==73              /* high-school degree */
replace education_group=3 if educ>=80 & educ<110  /* some college but no Bachelor's degree (includes those with associate's degree in vocational programs) */
replace education_group=4 if educ>=110 & educ<=125 & educ!=. /* has Bachelor's degree or more */
drop if educ==999 /* missing */


gen education_group_ma_or_more=1 if educ==123 | educ==124 | educ==125

gen college=0 if education_group==2
replace college=1 if education_group==4

label define col 0 "High School" 1 "College"
label values college col
label variable college "College"

gen college_alt=1 if education_group==2 | education_group==1
replace college_alt=2 if education_group==3
replace college_alt=3 if education_group==4

label define ed_a 1 "High School or Less" 2 "Some College" 3 "College"
label values college_alt ed_a
label variable college_alt "Education"


gen married=0 if marst==3 | marst==4 | marst==5 | marst==6 | marst==2
replace married=1 if marst==1 
label define lab_m 0 "Single" 1 "Married"
label values married lab_m
label variable married "Marital Status"

***Adjust earnings by CPI 
* Use CPI data from the Bureau of Labor Statistics: http://www.bls.gov/cpi/
* ftp://ftp.bls.gov/pub/special.requests/cpi/cpiai.txt
* the CPS asks earnings and income from last year. Therefore, I use the CPI from year T-1 to adjust earnings reported
*http://www.bls.gov/cpi/cpid1402.pdf
gen CPI=.
replace	CPI	=	29.9	if 	year==	1962
replace	CPI	=	30.2	if 	year==	1963
replace	CPI	=	30.6	if 	year==	1964
replace	CPI	=	31	if 	year==	1965
replace	CPI	=	31.5	if 	year==	1966
replace	CPI	=	32.4	if 	year==	1967
replace	CPI	=	33.4	if 	year==	1968
replace	CPI	=	34.8	if 	year==	1969
replace	CPI	=	36.7	if 	year==	1970
replace	CPI	=	38.8	if 	year==	1971
replace	CPI	=	40.5	if 	year==	1972
replace	CPI	=	41.8	if 	year==	1973
replace	CPI	=	44.4	if 	year==	1974
replace	CPI	=	49.3	if 	year==	1975
replace	CPI	=	53.8	if 	year==	1976
replace	CPI	=	56.9	if 	year==	1977
replace	CPI	=	60.6	if 	year==	1978
replace	CPI	=	65.2	if 	year==	1979
replace	CPI	=	72.6	if 	year==	1980
replace	CPI	=	82.4	if 	year==	1981
replace	CPI	=	90.9	if 	year==	1982
replace	CPI	=	96.5	if 	year==	1983
replace	CPI	=	99.6	if 	year==	1984
replace	CPI	=	103.9	if 	year==	1985
replace	CPI	=	107.6	if 	year==	1986
replace	CPI	=	109.6	if 	year==	1987
replace	CPI	=	113.6	if 	year==	1988
replace	CPI	=	118.3	if 	year==	1989
replace	CPI	=	124	if 	year==	1990
replace	CPI	=	130.7	if 	year==	1991
replace	CPI	=	136.2	if 	year==	1992
replace	CPI	=	140.3	if 	year==	1993
replace	CPI	=	144.5	if 	year==	1994
replace	CPI	=	148.2	if 	year==	1995
replace	CPI	=	152.4	if 	year==	1996
replace	CPI	=	156.9	if 	year==	1997
replace	CPI	=	160.5	if 	year==	1998
replace	CPI	=	163	if 	year==	1999
replace	CPI	=	166.6	if 	year==	2000
replace	CPI	=	172.2	if 	year==	2001
replace	CPI	=	177.1	if 	year==	2002
replace	CPI	=	179.9	if 	year==	2003
replace	CPI	=	184	if 	year==	2004
replace	CPI	=	188.9	if 	year==	2005
replace	CPI	=	195.3	if 	year==	2006
replace	CPI	=	201.6	if 	year==	2007
replace	CPI	=	207.342	if 	year==	2008
replace	CPI	=	215.303	if 	year==	2009
replace	CPI	=	214.537	if 	year==	2010
replace	CPI	=	218.056	if 	year==	2011
replace	CPI	=	224.939	if 	year==	2012
replace	CPI	=	229.594	if 	year==	2013
replace	CPI	=	232.957	if 	year==	2014
replace	CPI	=	236.736	if 	year==	2015

replace year=year-1

*convert CPI so that year 2010=100. All dollar amounts in the paper will be 2006 CPI adjusted dollars
replace CPI = CPI/218.056*100


*INCWAGE indicates each respondent's total pre-tax wage and salary income--
*        that is, money received as an employee--for the previous calendar year.
* 999999 = NIU
* 999998 = missing
gen incint_CPI_adjusted = incint*100/ CPI

replace incwage=. if incwage==999999 | incwage==999998
gen  incwage_CPI_adjusted= incwage*100/ CPI
*INCTOT indicates each respondent's total pre-tax personal income or losses 
*         from all sources for the previous calendar year.

gen Tax_liability = fedtaxac + stataxac
gen Tax_liability_CPI_adj=Tax_liability*100/ CPI

gen taxinc_CPI_adj=taxinc*100/ CPI

replace inctot=. if inctot==999999 | inctot==999998
gen inctotal_CPI_adjusted= inctot*100/CPI

replace uhrsworkly=0 if uhrsworkly==999
	gen hours=wkswork1*uhrsworkly 
	gen incwage_hourly=incwage_CPI_adjusted/hours

*HEALTH STATUS
keep if health==1 | health==2 | health==3 | health==4 | health==5 
	gen health_status=0 if health==5 | (disabwrk==2 & diffcare==2)
	replace health_status=1 if health==4 | health==3 | (disabwrk==2 | diffcare==2 | diffphys==2 | diffmob==2)
	replace health_status=2 if (health==1 | health==2) & (disabwrk!=2 & diffcare!=2 & diffphys!=2 & diffmob!=2)

gen labor_force=5 if hours>1500 & inclugh==2 & hours!=. 
replace labor_force=4 if hours>1500 & inclugh!=2  & hours!=. 
replace labor_force=3 if hours<=1500 & hours>=520 & inclugh==2 
replace labor_force=2 if hours<=1500 & hours>=520 & inclugh!=2 
replace labor_force=1 if hours<520 & hours>=0

drop if education_group==.
sort famid year

drop if race==200 |  race==805 | race==806 | race==807 // drop blacks
keep if hispan==0 // not hispanic. 

		
gen Mar_kids = 1 if married==0 & nchild==0
replace Mar_kids = 2 if married==0 & nchild>0 
replace Mar_kids = 3 if married==1 & nchild==0 
replace Mar_kids = 4 if married==1 & nchild>0 

label define mk 1 "Single, No Children" 2 "Single, w/ Children" 3 "Married, No Children" 4 "Married, w/ Children"
label values Mar_kids mk
save "${data}\CPS.dta", replace
}

* Figure 26
***** FAMILY SIZE BY AGE
qui {
clear 
use "${data}\CPS.dta"	
keep if  age<65
gen Fam_size=2 if married==1 & nchild!=.
replace Fam_size=Fam_size+nchild if married==1 & nchild>0
tab Fam_size
keep if  age<65

collapse (mean) Fam_size [aweight=perwt], by(college_alt age)

#delimit ;
line Fam_size age if college_alt==1, yaxis(1) xaxis(1) lwidth(medthick) lcolor(edkblue)
|| line  Fam_size age if college_alt==2 , lwidth(medthick) lcolor(cranberry)   lpattern(dash)
|| line  Fam_size age if college_alt==3 , lwidth(medthick) lcolor(cranberry)  
  ||, 
  ylabel(2.0(0.5)4.00)
  xlabel(25(5)65)
  ytitle("Family Size")
xtitle("Age")
   title("Average Family Size if Married, CPS", color(black))
     legend(label(1 "HS or Less") label(2 "Some College") label(3 "College")  )
	   legend(col(3) pos(6) region(lcolor(gs16)))
plotregion(margin(r+7 l+5) style(none))
  graphregion(icolor(white) fcolor(gs14) margin(none ))
  graphregion(color(white)) bgcolor(white)
  ;
#delimit cr
graph export "${out_tables}\Fam_size.eps", replace


gen age_sq=age^2
gen age_cub=age^3
reg Fam_size age age_sq age_cub if college_alt==1
predict y1
reg Fam_size age age_sq age_cub if college_alt==2
predict y2
reg Fam_size age age_sq age_cub if college_alt==3
predict y3
gen y=y1 if college_alt==1
replace y=y2 if college_alt==2
replace y=y3 if college_alt==3
drop y1 y2 y3


sort college_alt age
preserve 
drop age_sq age_cub
rename y equiv
sort age college_alt
drop Fam_size
save "${data}\Equiv.dta", replace
restore
keep y
outsheet using "${data_model}\parameters_fam_size.txt", nolabel nonames replace 

}


*** consumption floor - % receiving transfers and DI
** Tables 70, 71, 72 and Figure 25 
qui{
* note this is info from last year, so it's appropiate to use age-1. 
clear 
use "${data}\CPS.dta"	

table college_alt [aweight=perwt] , stat(fvpercent inclugh) stat(fvpercent hinsemp )
table [aweight=perwt] , stat(fvpercent inclugh) 

*  careful with the coding of these variables. 
*INCCHILD
*999999 = N.I.U. (Not in Universe)
replace incchild=0 if incchild==999999
*INCSSI
* 999999 = N.I.U. (Not in Universe).
replace incssi=0 if incssi==999999
* INCDISAB
* 9999999 = N.I.U. (Not in Universe).
replace incdisab=0 if incdisab==9999999
* INCWKCOM 
* 999999 = N.I.U. (Not in Universe).
replace incwkcom=0 if incwkcom==999999
*INCWELFR
* 999999 = N.I.U. (Not in Universe).
replace incwelfr=0 if incwelfr==999999



* measure that includes disability
gen TR=0 if   hinscaid==1  & incwelfr==0 &  incssi==0 & incdisab==0  
replace TR=1 if hinscaid==2 | (incwelfr>0 & incwelfr!=.) | (incssi>0 & incssi!=.) | (incdisab>0  & incdisab!=.)

* value of transfers
gen TR_value = (incwelfr+ incssi+ incdisab+ incchild	)	*100/ CPI
gen DI_value = (incssi+ incdisab)*100/ CPI
gen TR_value_non_DI = TR_value-DI_value	
gen DI_ssdi = (incdisab)*100/ CPI


gen DI_indic1 = 1 if ((incssi>0 & incssi!=.) | ( incdisab>0 & incdisab!=. )) & DI_value >500 
replace DI_indic1 = 0 if  (incssi==0 & incdisab==0) | (DI_value <500) //&   incwkcom==0

table education_group if age>=30 & age<=55, stat(mean TR  )
table education_group  if age>=30 & age<=55, stat(percent labor_force)
tab labor_force if education_group==1 & age>=30 & age<=55 [aweight=perwt]
tab labor_force if education_group==2 & age>=30 & age<=55 [aweight=perwt]


* Table 70 - Poor health benefits, singles 		
*  - c_min for those in poor H. We do singles, and then for married we multiply by a factor 
bysort college_alt: tabstat TR_value [aweight=perwt] if  DI_value >500 & DI_value!=.  & married==0  


label var DI_indic1 "DI"
label define lab_disab 0 "No DI" 1 "DI"
label values DI_indic1  lab_disab

label var TR "Any Gov Trans"
label define lab_tr 0 "No" 1 "Yes"
label values TR  lab_tr


* Table 71, left panel 
table  college_alt  (married)   if age>=30 & age<=55  [aweight=perwt] ,  statistic(mean DI_indic1)   nformat(%5.2f)  
collect title "Fraction receiving DI, CPS, Ages 30-55"
collect export "${out_tables}/CPS_TR3.tex", tableonly replace

* Table 72, left panel
table  college_alt  (married)   if age>=30 & age<=55  [aweight=perwt] ,  statistic(mean TR)  nformat(%5.2f)  
collect title "Fraction receiving Gov Trans, CPS, Ages 30-55"
collect export "${out_tables}/CPS_TR4.tex", tableonly replace



keep if  age<64

* Figure 25, left 		
		sort  college_alt age
		collapse (mean)   TR [aweight=perwt], by( college_alt age) fast
		reshape wide TR , i(age) j( college_alt)
		
 #delimit ;
line TR1 age , yaxis(1) xaxis(1) lwidth(medthick) lcolor(cranberry) 
|| line  TR2 age , lwidth(medthick) lcolor(cranberry)   lpattern(dash)
|| line  TR3 age , lwidth(medthick) lcolor(edkblue)  lpattern(dash)
  ||, 
  ylabel(0.0(.02).2)
  xlabel(25(5)65)
  ytitle("Fraction")
xtitle("Age")
   title("Fraction getting Gov Transfers, CPS", color(black))
  legend(label(1 "HS or Less") label(2 "Some College") label(3 "College")  )
  legend(col(3) pos(6) region(lcolor(gs16)))
plotregion(margin(r+7 l+5) style(none))
  graphregion(icolor(white) fcolor(gs14) margin(none ))
  graphregion(color(white)) bgcolor(white)
  ;
#delimit cr       
graph export "${out_tables}\TR_Profile.eps", replace

}


* Table 65
*2 PT/FT wages
qui{
clear 
use "${data}\CPS.dta"	
drop if labor_force==1
keep  if age>=30 & age<=55
drop if incwage_hourly<3.12
replace  incwage_hourly = 80 if incwage_hourly> 180 // to make same as MEPS where it's capped
gen PT_FT=0 if labor_force==2 | labor_force==3
replace PT_FT=1 if labor_force==4 | labor_force==5
collapse (mean) incwage_hourly, by(college_alt PT_FT)
reshape wide incwage_hourly, i(college_alt) j(PT_FT)
gen ratio=incwage_hourly0/incwage_hourly1

label var ratio "PT/FT Wage Ratio"
table  (college_alt),  statistic(mean ratio) nototals nformat(%5.2f) 
collect title "Average PT to FT Wages, ages 30-55, CPS"
collect export "${out_tables}/Target_av_wages3.tex", tableonly replace
}

* * Table 68 top panel - Wage distribution
qui{
clear 
use "${data}\CPS.dta"
keep if labor_force==4  | labor_force==5 

gen min_wage=.
replace min_wage=5.15 if year>=1997 & year<=2006
replace min_wage=5.85 if year==2007
replace min_wage=6.55 if year==2008
replace min_wage=7.25 if year==2009
replace min_wage=7.25 if year>=2010

*drop if incwage_hourly<min_wage/2
drop if incwage_hourly<3.35

*play around with this
drop if incwage_hourly> 100
replace incwage_hourly=94.34 if incwage_hourly>94.34 & incwage_hourly!=.

gen log_wages=log(incwage_hourly)

gen age_group=1 if age>=30 & age<=35
replace age_group=2 if age>=50 & age<=55
drop if age_group==.


egen wage_tercile = xtile(incwage_hourly) if age>=40 & age<=50, by(college_alt) n(3)
label var wage_tercile "Wage Tercile"
label define inc_t 1 "1st" 2 "2nd" 3 "3rd" 
label values wage_tercile inc_t

* Table 68 top panel 
table college_alt wage_tercile if  age>=40 & age<=50,  statistic(mean incwage_hourly) nototals nformat(%5.1f) 
collect title "Average Wages by Terciles, ages 40-50, FT Workers, CPS"
collect export "${out_tables}/CPS_Wage_terc.tex", tableonly replace

}

* Table 68 bottom panel - Wage distribution
qui{
clear 
use "${data}\CPS.dta"	
keep if age<65
drop if college_alt==.
drop if incwage_hourly<3.5

tabstat incwage_CPI_adjusted, stat(p95 p99 max)
//drop if incwage_CPI_adjusted>417874
drop if incwage_CPI_adjusted>300000
* keep PT and FT for now
		gen emp_type=. 
		replace emp_type=1 if (labor_force==2 | labor_force==3) 
		replace emp_type=2 if (labor_force==4 | labor_force==5) 
		drop if emp_type==.
* drop top 1% in each educ group
bysort college_alt:	tabstat incwage_hourly, stat(p99)
//drop if incwage_hourly>71 & college_alt==1
//drop if incwage_hourly>96 & college_alt==2
//drop if incwage_hourly>203 & college_alt==3

keep if age>=40 & age<=50
		
collapse (p5) p5=incwage_hourly (p25) p25=incwage_hourly (p50) p50=incwage_hourly (p75) p75=incwage_hourly (p90) p90=incwage_hourly (p95) p95=incwage_hourly (p99) p99=incwage_hourly [aweight=perwt], by (college_alt )	
gen data=0
rename college_alt education
save  "${data}\Wage_dist2.dta", replace
}


* save data for Figure 28 - earnings inequality by age
qui{
clear 
use "${data}\CPS.dta"	

gen min_wage=.
replace min_wage=5.15 if year>=1997 & year<=2006
replace min_wage=5.85 if year==2007
replace min_wage=6.55 if year==2008
replace min_wage=7.25 if year==2009
replace min_wage=7.25 if year>=2010 & year!=.


replace incwage_CPI_adjusted=. if incwage_hourly> 81.45 & labor_force>1
replace incwage_CPI_adjusted=. if incwage_hourly<min_wage & labor_force>1


bysort age : egen p98= pctile(incwage_CPI_adjusted), p(98) //college_alt
replace incwage_CPI_adjusted = . if incwage_CPI_adjusted>p98
replace incwage_CPI_adjusted = 0 if incwage_CPI_adjusted<0


gen GINI=.
tostring GINI , replace
foreach a of numlist 25/64  {
inequal7 incwage_CPI_adjusted [aweight=perwt] if  age==`a' 
replace GINI=r(gini) if  age==`a'
}
destring   GINI, replace

collapse (mean)  GINI* , by(age) 


		#delimit ;
				 line  GINI age  , lwidth(medthick)  lcolor(cranberry) 
				  ||, 
				  ylabel(0.3(.1).8)
				  xlabel(25(5)65)
				  ytitle("Income Gini")
				  xtitle("Age")
				  title("Income Gini Coefficients in CPS, by Age", color(black))
				  legend(label(1 "CPS")   )
  legend(col(2) pos(6) region(lcolor(gs16)))
plotregion(margin(r+7 l+5) style(none))
  graphregion(icolor(white) fcolor(gs14) margin(none ))
  graphregion(color(white)) bgcolor(white)
				  ;
				#delimit cr
sort age				
save  "${data}\Gini.dta", replace
}



* Table 55 - Employment distn at age 25
qui{
clear 
use "${data}\CPS.dta"	
keep if age==25 | age==26
collapse (count) number=serial [aweight=perwt], by(college_alt labor_force) fast
reshape wide number, i(college_alt) j(labor_force)
egen total=rowtotal(number1 number2 number3 number4 number5)
replace number1=number1/total*100
replace number2=number2/total*100
replace number3=number3/total*100
replace number4=number4/total*100
replace number5=number5/total*100
drop total
reshape long number , i(college_alt) j(emp)
sort college_alt emp

replace number=number/100
drop college_alt emp
}


* For Figures 21 and 22 - Employment by age 
qui{
clear 
use "${data}\CPS.dta"	
collapse (count) number=serial [aweight=perwt], by(college_alt age labor_force) fast 
reshape wide number, i(age college_alt) j(labor_force)
egen total=rowtotal(number1 number2 number3 number4 number5)
replace number1=number1/total*100
replace number2=number2/total*100
replace number3=number3/total*100
replace number4=number4/total*100
replace number5=number5/total*100
drop total


* % by educ working with and without insurance - will do graphs compared to data
replace number2 = number2+  number3 // PT
replace number3 = number4 // FT, no ins
replace number4 = number5 
rename college_alt education
sort age education
save "${data}\CPS LS Ins.dta", replace

}


qui{
clear 
use "${data}\CPS.dta"
keep if labor_force==4  | labor_force==5 
drop if incwage_hourly<3.35 
drop if incwage_hourly>180
replace incwage_hourly=log(incwage_hourly)
		
collapse (sd) var_wages =incwage_hourly [aweight=perwt], by (college_alt age)		
replace var_wages=var_wages^2
		reshape wide var_wages , i(age) j(college_alt)
		sort age
save "${data}\Var Wage Profiles2019.dta", replace
		
}




				

